##Multivariate resemblance##
#libraries sources needed and stuff
library(vegan)
library(permute)
library(simba)
library(cluster)
library(ecodist)

#load go data
methtis<-read.csv('100tiss.csv',header=TRUE,row.names=1)
methtis.log<-log(methtis+1)
hist.plots(methtis.log)
methtis.jac<-sim(methtis,method="euclid")
methtis.jac

methtis.pcoa<-cmdscale(methtis.jac,k=35,eig=TRUE,add=TRUE)
methtis.pcoa
#want to know the % variation explained by any principal component? then divide each eigenvalue by the sum of the eigenvalues across all PCs. Below looking at first 5 PCs
methtis.pcoa$eig[1:5]/sum(methtis.pcoa$eig)*100
#look for signifiance of each PC by compare eigenvalues to expectations according to broken stick model
plot(methgo.pcoa$eig/sum(methgo.pcoa$eig)*100-bstick(45)*100,xlab="PC", ylab="Actual-random percent variation explained") 
abline(h=0)
#look at ordination plot of PC and PC2
ordiplot(methtis.pcoa, choices=c(1,2), type="text", display="sites", xlab="PC1 (1%)", ylab="PC2 (1%)")

#Now calculate the PCA loadings (i.e. variable weights)
#here performs a linear regression between each of the original desriptors (species)and the scores from each PC. A permutation test is used to assess statistical significance
vec.methtis<-envfit(methtis.pcoa$points, methtis.log,perm=1000)
#look at values
vec.methtis

#now plot the eigenvectors on the ordi plot
ordiplot(methtis.pcoa, choices=c(1,2), type="text", display="sites", xlab='PC1 (60%)', ylab="PC2 (10%)")
#same plot just w/0 labels
ordiplot(methtis.pcoa, choices=c(1,2), xlab='PC1 (60%)', ylab="PC2 (10%)")
plot(vec.methtis, p.max=0.9, col="blue")
#perform NMBS on log transformed data distrance is distance coefficient to use (default is bray-curtis), k is the number of ordnation axes to generate, autotransform default is TRUE, will transform the data first (usually square root), trymax is the number of random starts to try, default is 20
methtis.nmds<-metaMDS(methtis.log,distance='bray', k=2, autotransform=FALSE, trymax=100)
#look at it
methtis.nmds
#get a list of objects resulting from the analysis
names(meththis.nmds)
#evaluate the number of dimensions with a scree plot
nmds.scree(methtis.log, distance='bray', k=10, autotransform=FALSE, trymax=20)
#or try the randomization thing
nmds.monte(methtis.log, distance='bray', k=3, autotransform=FALSE, trymax=20)
#how good a job does NMDS do? check by looking at corrlation between the caclulated dissimilarities and the plotted values. Specifically, plot original dissimilarities and euclidean distances in the ordination using stressplot()
stressplot(methtis.nmds)
#look at a plot of the NMDS
plot(methtis.nmds, type='n')
text(methtis.nmds, labels=row.names(meth))
#if we'd like to see how a particular descriptor (in this case, a weird fish abundance) changes with location we can make te symbol size proportional to log abundance
plot(meth.nmds, type='n')
points(meth.nmds,cex=meth.log$X.CG.in.MBD.exon)
#if we want to look at the sample scores, which are the coordinates of the samples in k-dimensional ordination, they are stored in the object names points
meth.nmds$points
#to calculate species loadings (i.e. variable weights) on each derived axis, use envfit with the NMDS scores
vec.meth<-envfit(meth.nmds$points, meth.log, perm=1000)
vec.meth
#NOW these loadings can be plotted
ordiplot(meth.nmds, choices=c(1,2), xlab='Axis 1', ylab='Axis 2',)
plot(vec.meth, p.max=0.01, col='blue')

library(RColorBrewer)
library(gplots)
#ok, it worked with this template, so now I'm going to try pulling in all the genes. log transforming the data. saving the file as a txt file. then changing to a dat
go.exp<-read.csv("methgo.csv",header=TRUE,row.names=1)
#transpose
gologt<-t(go.exp)
mat=data.matrix(go.exp)
pdf("gooys2.pdf", height=10, width=10)
heatmap.2(mat,
Rowv=TRUE,
Colv=TRUE,
#    dendrogram= c("none"),
distfun = dist,
hclustfun = hclust,
xlab = "X data", ylab = "Y data",
key=TRUE,
keysize=1,
trace="none",
density.info=c("none"),
margins=c(10, 8),
col=brewer.pal(10,"PiYG")
#    col=redgreen(75),
)
dev.off()